#coding=utf8

import jieba
import jieba.analyse
import cPickle

import pkg_resources


topic_set_name = pkg_resources.resource_filename(__name__,'zhihu/topic_set.pkl')
topic_set = cPickle.load(open(topic_set_name))

jieba.analyse.set_idf_path(pkg_resources.resource_filename(__name__,'zhihu/idf-zhihu.txt.big'))

def get_theme(text):
	s_set = set(jieba.analyse.extract_tags(text))
	same = list(s_set&topic_set)
	if len(same)<3: return list(s_set)
	return list(same)


